# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import pickle


def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    '''
    使用滑动窗口将时间序列转换为可供监督模型的数据
    '''
    # 返回时间序列的变量个数
    try:
        n_vars = 1 if type(data) is list else data.shape[1]
    except IndexError:
        print('数据被是被为一个向量')
        n_vars = 1
    except:
        n_vars = data.shape[1]
    # 将数据转换为 dataframe
    df = pd.DataFrame(data)
    # 数据列和列名的临时变量
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('load%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('load%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('load%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

if __name__ == '__main__':
    path = r'../附件/附件2：某地消防救援出警数据.xlsx'
    time_series = pd.read_excel(path)
    # 根据月份找出事故次数
    data = time_series.groupby(pd.Grouper(freq='1M', \
                                key='接警日期')).count().iloc[:, 1].values
    
    month_data = pd.DataFrame(data=data, columns=['事故次数'])
    cols = month_data.columns
    # 数据标准化
    scaler = StandardScaler()
    data_after = scaler.fit_transform(month_data)
    data_after = pd.DataFrame(data_after, columns=cols)
    data_after.to_excel(r'../附件/标准化后的次数数据（月）.xlsx')
    # 滑动窗口模型
    data_window = series_to_supervised(data_after.values, \
                                        n_in=5, n_out=1, dropnan=True)

    data_window.to_excel(r'../附件/滑动窗口后的次数数据（月）.xlsx')
    pickle.dump(scaler, open(r'../附件/scaler.pkl', 'wb'))
